# PSRM Application of Dictionary Nov 2025

sessionInfo() 
#R version 4.3.3 (2024-02-29 ucrt)
#Platform: x86_64-w64-mingw32/x64 (64-bit)
#Running under: Windows 11 x64 (build 26200)

library(broom)
library(boot)
library(dplyr)
library(extrafont)
library(ggplot2)
library(glmnet)     
library(lmtest)
library(lsa)
library(LSX)
library(lubridate)
library(MASS)
library(nnet)
library(pscl)
library(quanteda)
library(quanteda.textmodels)
library(quanteda.textplots)
library(reshape)
library(reshape2)
library(showtext)
library(text2vec)
library(tmcn)
library(topicmodels)
library(tidyverse)
library(word2vec)
library(xtable)
library(xts)
library(zoo)

showtext_auto()

getwd()
# Loading Data ------------------------------------------------------------

tw_dfm   <- readRDS("tw_dfm_application_dictionary_nov_2025.Rds") # Loading Taiwan DFM
tw_dfm_w <- readRDS("tw_dfm_w_application_dictionary_nov_2025.Rds") # Loading Taiwan DFM (Weighted)
tw       <- readRDS("tw_application_dictionary_nov_2025.Rds") # Loading Taiwan Dataframe

head(tw_dfm)
implicit_threat <- c(
  "一切后果", "严重后果", "后果", "恶果", "不良后果", "自食恶果", 
  "自食其果", "不顾后果", "恶劣影响", "玩火", "苦果", "负面影响", 
  "适得其反", "消极影响", "玩火自焚", "自取灭亡", "引火自焚", 
  "自寻死路", "后果严重")

dictionary_list <- list(
  "implicit_threat" = implicit_threat
)


full_dictionary <- dictionary(dictionary_list)


tw_sent_issue_w <- dfm_lookup(tw_dfm_w  ,dictionary = full_dictionary)


tw$implicit_threat <- as.numeric(tw_sent_issue_w[,1]) * 100




# LSS model application---------------------------------------------------------------

set.seed(1)
lss_scores <- textmodel_lss(
  tw_dfm, 
  seeds = implicit_threat, k = 300, cache = TRUE)

highlighted_terms <- c(implicit_threat)

dfmat_doc <- dfm_group(tw_dfm)

tw$lss_score <- predict(lss_scores, newdata = dfmat_doc) # applying the LSS predicted score into the dataframe



# Appendix Table 4 p.8 ----------------------------------------------------


text_plot<-textplot_terms(
  lss_scores,
  highlighted = highlighted_terms,  # Highlight 
  max_highlighted = length(highlighted_terms),  
  max_words = 1000                              
) +
  scale_colour_manual(values ="blue") +  
  theme_minimal()

ggsave("Appendix_Table_4.png", plot = text_plot, width = 8, height = 6, dpi = 300)





